#!/usr/bin/env python
# coding=utf-8
# __author__ = 'Yunchao Ling'

import re
import sys


def extractPMCId(line):
    result = ""
    regex = "^.*?PMC(\d*)\.pdf.*?$"
    match = re.match(regex, line)

    if match:
        result = match.group(1)

    return result


if __name__ == "__main__":
    infile = open(sys.argv[1], 'r')

    status = {}

    for line in infile:
        line = line.rstrip()
        id = extractPMCId(line)
        if id != "":
            if line.find("been") != -1:
                status[id] = 1
            elif line.find("invalid") != -1:
                status[id] = 2
            elif line.find("retracted") != -1:
                status[id] = 3
            elif line.find("exception") != -1:
                status[id] = 4

    for key in status.keys():
        if status[key] != 1:
            if status[key] == 2:
                print key + ":invalid"
            elif status[key] == 3:
                print key + ":retracted"
            elif status[key] == 4:
                print key + ":exception"

    infile.close()




